#delimit ;
clear all;

* Load data;
import delimited "${raw}/worker_task/results_2021-10-01"         , clear; tempfile results   ; save `results'   , replace;
import delimited "${raw}/worker_task/exante_2021-10-01.csv"      , clear; tempfile exante    ; save `exante'    , replace;
import delimited "${raw}/worker_task/expost_2021-10-01.csv"      , clear; tempfile expost    ; save `expost'    , replace;
import delimited "${raw}/worker_task/PageTimes-2021-10-01.csv"   , clear; tempfile page_times; save `page_times', replace;
import delimited "${raw}/worker_task/instructions_2021-10-01.csv", clear; tempfile instruct  ; save `instruct'  , replace;


* Clean screen times;
use `page_times', clear;

bys participant_code (page_index): egen last_round = max(round_number) if page_name == "Decision"; 

bys participant_code (epoch_time):  gen time_elapsed       = epoch_time[_n] - epoch_time[_n-1];
by  participant_code             : egen time_experiment    = total(time_elapsed);
by  participant_code             : egen time_comprehension = total(time_elapsed)    if regex(page_name,"Comprehension");
by  participant_code             : egen time_instructions  = total(time_elapsed)    if page_name == "Instructions";
by  participant_code             : egen avg_time_round     = mean(time_elapsed)     if page_name == "Decision";
by  participant_code             : egen max_time_round     = max(time_elapsed)      if page_name == "Decision";
by  participant_code             : egen p50_time_round     = median(time_elapsed)   if page_name == "Decision";
by  participant_code             : egen min_time_round     = min(time_elapsed)      if page_name == "Decision";
by  participant_code             : egen nr_rounds          = max(round_number)      if page_name == "Decision";

collapse (firstnm) time_* nr_rounds *time_round, by(participant_code);
rename participant_code participantcode; 
save `page_times', replace;

use `instruct', replace; 
merge 1:1 participantcode using `results'   , nogen;
merge 1:1 participantcode using `page_times', nogen;
merge 1:1 participantcode using `exante'    , nogen;
merge 1:1 participantcode using `expost'    , nogen;

tab participant_index_in_pages;

drop if participant_index_in_pages == 0         ; /* Drop sessions that were never started */;
drop if inlist(participant_index_in_pages,1,3,5); /* Drop people who gave up before instructions */;
keep if participant_index_in_pages == 315 ;       /* Only keep those who finished survey */;


* Rename and define some variables;
rename player* *;
gen trt_ea1 = (regex(treatment, "EA1"));
gen trt_ep0 = (regex(treatment, "EP0"));
gen trt_ep1 = (regex(treatment, "EP1"));
gen trt_cf0 = (regex(treatment, "CF0"));
gen trt_cf1 = (regex(treatment, "CF1"));

encode treatment, gen(condition);
rename participantcode id;
gen male    = (gender == "Male") if !inlist(gender,"I prefer not to say", "Non-binary");
gen married = (regex(martial,"Married"));
gen white   = (regex(ethnicity,"White"));
gen high_income = (income == "$75,000  to $99,999" | income == "$100,000 to $149,999" | income == "Greater than $150,000");
gen employed     = (occupation == "Employed for wages (full or part-time)");
gen selfemployed = (occupation == "Self-employed");
gen college = (education == "Associate degree" | education == "Bachelor degree" | education == "MBA degree" | education == "Master degree" | education == "PhD degree" | education == "Professional degree" ) if education != "I prefer not to say";

keep id trt_*  multiplier num_incorrect_compq show_score male age masters married white high_income employed selfemployed occupation college education effort confidence answer_t answer_m correct_t correct_m answer_t_o answer_m_o total_t total_p total_r condition time_* nr_rounds *time_round num_incorrect_compq;

rename (answer_t answer_m) (tsks_perceived perceived_multiplier);
rename (answer_t_o answer_m_o) (tasks_perceived_opp perceived_multiplier_opp);
rename (total_t total_p total_r) (tasks_comp points tasks_attempted);
rename (correct_t correct_m) (guessed_tasks guessed_multiplier);

replace tasks_attempted = tasks_attempted - 1;
replace tasks_attempted = tasks_comp if tasks_comp > tasks_attempted;

replace multiplier                = .                                       if (trt_cf0 == 1 | trt_cf1 == 1);
gen low_multiplier                = (multiplier <= 2.5)                     if multiplier != .;
gen low_perceived_multiplier      = (perceived_multiplier <= 2.2)           if perceived_multiplier != .;
gen high_perceived_multiplier_opp = (perceived_multiplier_opp >= 2)         if perceived_multiplier_opp != .;
gen opp_higher_multip             = (multiplier < perceived_multiplier_opp) if multiplier != . & perceived_multiplier_opp!=.;
gen knows_multiplier              = (condition == 3);
gen below5_tasks                  = (tasks_comp < 5);

foreach v of varlist _all {; label variable `v' ""; }; 

order id condition trt_* num_incorrect_compq tasks_comp multiplier age male married white high_income tasks_perceived guessed_tasks perceived_multiplier guessed_multiplier tasks_perceived_opp perceived_multiplier_opp masters;
compress;
save "${data}\worker.dta", replace;

